BirthdataNC <- read.csv("/Users/dingyuanzhang/Documents/S19_Eco220L/BirthdataNC.csv")
statbygender<- BirthdataNC %>%
group_by(gender) %>%
summarise(avgweight=round(mean(weight),3),sdweight=round(sd(weight),3))
statbygender
gender avgweight sdweight
# Interactive histogram of the data with density function included
plot <-BirthdataNC %>%
group_by(gender) %>%
ggplot(aes(x=weight,fill=gender)) +
geom_histogram(aes(y=..density..),breaks=seq(0, 12, by = 0.3)) +
geom_vline(xintercept = 7.3, linetype="dotted", color = "blue", size=1.5) +
geom_density(alpha=0.5)
ggplotly(plot)
# You could also add these two layers to plot normal distributios too, similar to what we did in class for the sampling dist. No need to that here, but this is how you do it.
# stat_function(fun = dnorm, args = list(mean = 6.9, sd = 1.47), col=2)+ stat_function(fun = dnorm, args = list(mean = 7.3, sd = 1.51), col=4)+ labs(title="Histogram for sample means")+ theme_classic()
# Create quartiles of the data
quantile(BirthdataNC$weight)
# Create quartiles by gender.
female<- BirthdataNC %>% filter(gender=='female') %>% select(weight)
male <- BirthdataNC %>% filter(gender=='male') %>% select(weight)
quantile(female$weight)
quantile(male$weight)
# Create deciles of the data
deciles<-seq(0,1, by=0.1)
# this creates a sequence from 0 to 1 by 0.1
quantile(female$weight, deciles)
quantile(male$weight, deciles)
print("75% Percentiles for male and female")
p75m<-quantile(male$weight, 0.75)
p45f<-quantile(female$weight, 0.45)
## 0% 25% 50% 75% 100%
## 1.00 6.38 7.31 8.06 11.75
## 0% 25% 50% 75% 100%
## 1.00 6.25 7.13 7.75 11.63
## 0% 25% 50% 75% 100%
## 1.38 6.56 7.44 8.31 11.75
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 1.000 5.392 6.000 6.500 6.866 7.130 7.380 7.630 7.940 8.380
## 100%
## 11.630
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 1.380 5.476 6.310 6.866 7.190 7.440 7.810 8.130 8.488 8.964
## 100%
## 11.750
## [1] "75% Percentiles for male and female"
My study finds that weight for the 75% percentile in the male population is 8.31 pounds. For the female population de 45% weight is 7 pounds
What can you say about the distribution by looking at this graphs.
# Boxplot by gender, interactive
q<-BirthdataNC %>%
group_by(gender) %>%
ggplot(aes(y=gender)) +
geom_boxplot(aes(col=gender)) +
labs(title="Weight distribution by gender - Boxplot") +
theme_classic() +
coord_flip()
ggplotly(q)
plotly_plot <- plot_ly(BirthdataNC, x = ~weight, y = ~gender, type = "box")
plotly_plot